library(tidyverse)
## -- Attaching packages ------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.2.1     v purrr   0.3.3
## v tibble  2.1.3     v dplyr   0.8.3
## v tidyr   1.0.0     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.4.0
## -- Conflicts ---------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(magrittr)
## 
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
## 
##     set_names
## The following object is masked from 'package:tidyr':
## 
##     extract
drivers <- feather::read_feather("drivers.feather")
incidents <- feather::read_feather("incidents.feather")
non_motorists <- feather::read_feather("non_motorists.feather")
combo <- feather::read_feather("combo.feather")
incident_locations <- incidents %>%
  filter(!not_in_county) %>%
  select(longitude, latitude)
kmeans_list <- lapply(1:15,function(x)kmeans(incident_locations, centers = x, nstart = 25, iter.max = 100))
## Warning: Quick-TRANSfer stage steps exceeded maximum (= 2852800)
centroidss <- data.frame(
  centroids = 1:15,
  sum_of_squares = sapply(1:15,function(x)kmeans_list[[x]]$tot.withinss)
  )
centroidss %>%
  ggplot(aes(x = centroids, y = sum_of_squares)) +
  geom_line()

centroidss %>% knitr::kable()
centroids sum_of_squares
1 805.93049
2 304.84448
3 213.07526
4 172.06474
5 139.81327
6 117.46724
7 99.63681
8 87.33599
9 75.62231
10 66.97816
11 60.92719
12 55.57872
13 50.99439
14 47.48665
15 44.29750
incident_clusters <- incident_locations
for(x in 1:15){
  incident_clusters$cluster <- 
    as.factor(paste0("cluster", kmeans_list[[x]]$cluster))
  temp <-incident_clusters %>%
    ggplot(aes(x = longitude, y = latitude, color = cluster)) +
    geom_point()
  temp %>% print()
}

contingency_table <- function(data, var1, var2){
data %>%
  group_by(!!enquo(var1), !!enquo(var2)) %>%
  tally() %>%
  ggplot() +
  geom_bin2d(aes(x = !!enquo(var1), fill = log(n), y = !!enquo(var2))) +
  geom_text(aes(x = !!enquo(var1), label = n, y = !!enquo(var2))) +
  scale_fill_gradient(low = "#ece7f2",
                      high = "#a6bddb") +
  theme(
    panel.grid = element_blank(),
    legend.position = "none",
    panel.background = element_rect(fill = "white")
    )
}
temp <-chisq.test(as.factor(incidents$weather), as.factor(incidents$acrs_report_type))
## Warning in chisq.test(as.factor(incidents$weather),
## as.factor(incidents$acrs_report_type)): Chi-squared approximation may be
## incorrect
temp %>% broom::tidy() %>% knitr::kable()
statistic p.value parameter method
171.3451 0 24 Pearson’s Chi-squared test

p-value 3.249696710^{-24}

contingency_table(incidents, acrs_report_type, weather)

Helmets

helmet_data <- non_motorists %>%
  filter(as.logical(pedestrian_type_bicyclist)) %>%
  select(safety_equipment,injury_severity) %>%
  mutate(safety_equipment = c("no helmet","helmet")[1+as.integer(str_detect(tolower(safety_equipment), "helmet"))])

temp <-chisq.test(as.factor(helmet_data$safety_equipment), as.factor(helmet_data$injury_severity))
## Warning in chisq.test(as.factor(helmet_data$safety_equipment),
## as.factor(helmet_data$injury_severity)): Chi-squared approximation may be
## incorrect
temp %>% broom::tidy() %>% knitr::kable()
statistic p.value parameter method
7.365619 0.117783 4 Pearson’s Chi-squared test
contingency_table(helmet_data, safety_equipment,injury_severity)

Speed Limits

ordered_injury <- drivers %>%
  count(injury_severity) %>%
  arrange(desc(n)) %>%
  pull(injury_severity)

speed_limit_data <-drivers %>%
  select(speed_limit,injury_severity) %>%
  mutate(
    speed_limit = factor(c("Under 35", "35 - 45", "50+")[1 + findInterval(speed_limit, c(31, 46))], levels = c("Under 35", "35 - 45", "50+")),
    injury_severity = factor(injury_severity, ordered_injury)
  )

temp <-chisq.test(as.factor(speed_limit_data$speed_limit), as.factor(speed_limit_data$injury_severity))
## Warning in chisq.test(as.factor(speed_limit_data$speed_limit),
## as.factor(speed_limit_data$injury_severity)): Chi-squared approximation may be
## incorrect
temp %>% broom::tidy() %>% knitr::kable()
statistic p.value parameter method
995.2863 0 8 Pearson’s Chi-squared test

p-value 1.554337810^{-209}

contingency_table(speed_limit_data, speed_limit, injury_severity)

## Loading required package: Rcpp
## Loading required package: rlang
## 
## Attaching package: 'rlang'
## The following object is masked from 'package:magrittr':
## 
##     set_names
## The following objects are masked from 'package:purrr':
## 
##     %@%, as_function, flatten, flatten_chr, flatten_dbl, flatten_int,
##     flatten_lgl, flatten_raw, invoke, list_along, modify, prepend,
##     splice
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
## 
## SAMPLING FOR MODEL 'prophet' NOW (CHAIN 1).
## Chain 1: 
## Chain 1: Gradient evaluation took 0.002 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 20 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1: 
## Chain 1: 
## Chain 1: WARNING: There aren't enough warmup iterations to fit the
## Chain 1:          three stages of adaptation as currently configured.
## Chain 1:          Reducing each adaptation stage to 15%/75%/10% of
## Chain 1:          the given number of warmup iterations:
## Chain 1:            init_buffer = 7
## Chain 1:            adapt_window = 38
## Chain 1:            term_buffer = 5
## Chain 1: 
## Chain 1: Iteration:  1 / 100 [  1%]  (Warmup)
## Chain 1: Iteration: 10 / 100 [ 10%]  (Warmup)
## Chain 1: Iteration: 20 / 100 [ 20%]  (Warmup)
## Chain 1: Iteration: 30 / 100 [ 30%]  (Warmup)
## Chain 1: Iteration: 40 / 100 [ 40%]  (Warmup)
## Chain 1: Iteration: 50 / 100 [ 50%]  (Warmup)
## Chain 1: Iteration: 51 / 100 [ 51%]  (Sampling)
## Chain 1: Iteration: 60 / 100 [ 60%]  (Sampling)
## Chain 1: Iteration: 70 / 100 [ 70%]  (Sampling)
## Chain 1: Iteration: 80 / 100 [ 80%]  (Sampling)
## Chain 1: Iteration: 90 / 100 [ 90%]  (Sampling)
## Chain 1: Iteration: 100 / 100 [100%]  (Sampling)
## Chain 1: 
## Chain 1:  Elapsed Time: 10.751 seconds (Warm-up)
## Chain 1:                9.755 seconds (Sampling)
## Chain 1:                20.506 seconds (Total)
## Chain 1: 
## 
## SAMPLING FOR MODEL 'prophet' NOW (CHAIN 2).
## Chain 2: 
## Chain 2: Gradient evaluation took 0 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2: 
## Chain 2: 
## Chain 2: WARNING: There aren't enough warmup iterations to fit the
## Chain 2:          three stages of adaptation as currently configured.
## Chain 2:          Reducing each adaptation stage to 15%/75%/10% of
## Chain 2:          the given number of warmup iterations:
## Chain 2:            init_buffer = 7
## Chain 2:            adapt_window = 38
## Chain 2:            term_buffer = 5
## Chain 2: 
## Chain 2: Iteration:  1 / 100 [  1%]  (Warmup)
## Chain 2: Iteration: 10 / 100 [ 10%]  (Warmup)
## Chain 2: Iteration: 20 / 100 [ 20%]  (Warmup)
## Chain 2: Iteration: 30 / 100 [ 30%]  (Warmup)
## Chain 2: Iteration: 40 / 100 [ 40%]  (Warmup)
## Chain 2: Iteration: 50 / 100 [ 50%]  (Warmup)
## Chain 2: Iteration: 51 / 100 [ 51%]  (Sampling)
## Chain 2: Iteration: 60 / 100 [ 60%]  (Sampling)
## Chain 2: Iteration: 70 / 100 [ 70%]  (Sampling)
## Chain 2: Iteration: 80 / 100 [ 80%]  (Sampling)
## Chain 2: Iteration: 90 / 100 [ 90%]  (Sampling)
## Chain 2: Iteration: 100 / 100 [100%]  (Sampling)
## Chain 2: 
## Chain 2:  Elapsed Time: 8.017 seconds (Warm-up)
## Chain 2:                8.846 seconds (Sampling)
## Chain 2:                16.863 seconds (Total)
## Chain 2: 
## 
## SAMPLING FOR MODEL 'prophet' NOW (CHAIN 3).
## Chain 3: 
## Chain 3: Gradient evaluation took 0.001 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 10 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3: 
## Chain 3: 
## Chain 3: WARNING: There aren't enough warmup iterations to fit the
## Chain 3:          three stages of adaptation as currently configured.
## Chain 3:          Reducing each adaptation stage to 15%/75%/10% of
## Chain 3:          the given number of warmup iterations:
## Chain 3:            init_buffer = 7
## Chain 3:            adapt_window = 38
## Chain 3:            term_buffer = 5
## Chain 3: 
## Chain 3: Iteration:  1 / 100 [  1%]  (Warmup)
## Chain 3: Iteration: 10 / 100 [ 10%]  (Warmup)
## Chain 3: Iteration: 20 / 100 [ 20%]  (Warmup)
## Chain 3: Iteration: 30 / 100 [ 30%]  (Warmup)
## Chain 3: Iteration: 40 / 100 [ 40%]  (Warmup)
## Chain 3: Iteration: 50 / 100 [ 50%]  (Warmup)
## Chain 3: Iteration: 51 / 100 [ 51%]  (Sampling)
## Chain 3: Iteration: 60 / 100 [ 60%]  (Sampling)
## Chain 3: Iteration: 70 / 100 [ 70%]  (Sampling)
## Chain 3: Iteration: 80 / 100 [ 80%]  (Sampling)
## Chain 3: Iteration: 90 / 100 [ 90%]  (Sampling)
## Chain 3: Iteration: 100 / 100 [100%]  (Sampling)
## Chain 3: 
## Chain 3:  Elapsed Time: 8.967 seconds (Warm-up)
## Chain 3:                10.215 seconds (Sampling)
## Chain 3:                19.182 seconds (Total)
## Chain 3: 
## 
## SAMPLING FOR MODEL 'prophet' NOW (CHAIN 4).
## Chain 4: 
## Chain 4: Gradient evaluation took 0.001 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 10 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4: 
## Chain 4: 
## Chain 4: WARNING: There aren't enough warmup iterations to fit the
## Chain 4:          three stages of adaptation as currently configured.
## Chain 4:          Reducing each adaptation stage to 15%/75%/10% of
## Chain 4:          the given number of warmup iterations:
## Chain 4:            init_buffer = 7
## Chain 4:            adapt_window = 38
## Chain 4:            term_buffer = 5
## Chain 4: 
## Chain 4: Iteration:  1 / 100 [  1%]  (Warmup)
## Chain 4: Iteration: 10 / 100 [ 10%]  (Warmup)
## Chain 4: Iteration: 20 / 100 [ 20%]  (Warmup)
## Chain 4: Iteration: 30 / 100 [ 30%]  (Warmup)
## Chain 4: Iteration: 40 / 100 [ 40%]  (Warmup)
## Chain 4: Iteration: 50 / 100 [ 50%]  (Warmup)
## Chain 4: Iteration: 51 / 100 [ 51%]  (Sampling)
## Chain 4: Iteration: 60 / 100 [ 60%]  (Sampling)
## Chain 4: Iteration: 70 / 100 [ 70%]  (Sampling)
## Chain 4: Iteration: 80 / 100 [ 80%]  (Sampling)
## Chain 4: Iteration: 90 / 100 [ 90%]  (Sampling)
## Chain 4: Iteration: 100 / 100 [100%]  (Sampling)
## Chain 4: 
## Chain 4:  Elapsed Time: 9.467 seconds (Warm-up)
## Chain 4:                9.49 seconds (Sampling)
## Chain 4:                18.957 seconds (Total)
## Chain 4:
## Warning: The largest R-hat is 1.11, indicating chains have not mixed.
## Running the chains for more iterations may help. See
## http://mc-stan.org/misc/warnings.html#r-hat
## Warning: Bulk Effective Samples Size (ESS) is too low, indicating posterior means and medians may be unreliable.
## Running the chains for more iterations may help. See
## http://mc-stan.org/misc/warnings.html#bulk-ess
## Warning: Tail Effective Samples Size (ESS) is too low, indicating posterior variances and tail quantiles may be unreliable.
## Running the chains for more iterations may help. See
## http://mc-stan.org/misc/warnings.html#tail-ess
plot(total_model, forecast)

prophet_plot_components(total_model, forecast)

## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
## 
## SAMPLING FOR MODEL 'prophet' NOW (CHAIN 1).
## Chain 1: 
## Chain 1: Gradient evaluation took 0 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1: 
## Chain 1: 
## Chain 1: WARNING: There aren't enough warmup iterations to fit the
## Chain 1:          three stages of adaptation as currently configured.
## Chain 1:          Reducing each adaptation stage to 15%/75%/10% of
## Chain 1:          the given number of warmup iterations:
## Chain 1:            init_buffer = 7
## Chain 1:            adapt_window = 38
## Chain 1:            term_buffer = 5
## Chain 1: 
## Chain 1: Iteration:  1 / 100 [  1%]  (Warmup)
## Chain 1: Iteration: 10 / 100 [ 10%]  (Warmup)
## Chain 1: Iteration: 20 / 100 [ 20%]  (Warmup)
## Chain 1: Iteration: 30 / 100 [ 30%]  (Warmup)
## Chain 1: Iteration: 40 / 100 [ 40%]  (Warmup)
## Chain 1: Iteration: 50 / 100 [ 50%]  (Warmup)
## Chain 1: Iteration: 51 / 100 [ 51%]  (Sampling)
## Chain 1: Iteration: 60 / 100 [ 60%]  (Sampling)
## Chain 1: Iteration: 70 / 100 [ 70%]  (Sampling)
## Chain 1: Iteration: 80 / 100 [ 80%]  (Sampling)
## Chain 1: Iteration: 90 / 100 [ 90%]  (Sampling)
## Chain 1: Iteration: 100 / 100 [100%]  (Sampling)
## Chain 1: 
## Chain 1:  Elapsed Time: 5.801 seconds (Warm-up)
## Chain 1:                5.54 seconds (Sampling)
## Chain 1:                11.341 seconds (Total)
## Chain 1: 
## 
## SAMPLING FOR MODEL 'prophet' NOW (CHAIN 2).
## Chain 2: 
## Chain 2: Gradient evaluation took 0 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2: 
## Chain 2: 
## Chain 2: WARNING: There aren't enough warmup iterations to fit the
## Chain 2:          three stages of adaptation as currently configured.
## Chain 2:          Reducing each adaptation stage to 15%/75%/10% of
## Chain 2:          the given number of warmup iterations:
## Chain 2:            init_buffer = 7
## Chain 2:            adapt_window = 38
## Chain 2:            term_buffer = 5
## Chain 2: 
## Chain 2: Iteration:  1 / 100 [  1%]  (Warmup)
## Chain 2: Iteration: 10 / 100 [ 10%]  (Warmup)
## Chain 2: Iteration: 20 / 100 [ 20%]  (Warmup)
## Chain 2: Iteration: 30 / 100 [ 30%]  (Warmup)
## Chain 2: Iteration: 40 / 100 [ 40%]  (Warmup)
## Chain 2: Iteration: 50 / 100 [ 50%]  (Warmup)
## Chain 2: Iteration: 51 / 100 [ 51%]  (Sampling)
## Chain 2: Iteration: 60 / 100 [ 60%]  (Sampling)
## Chain 2: Iteration: 70 / 100 [ 70%]  (Sampling)
## Chain 2: Iteration: 80 / 100 [ 80%]  (Sampling)
## Chain 2: Iteration: 90 / 100 [ 90%]  (Sampling)
## Chain 2: Iteration: 100 / 100 [100%]  (Sampling)
## Chain 2: 
## Chain 2:  Elapsed Time: 6.701 seconds (Warm-up)
## Chain 2:                6.932 seconds (Sampling)
## Chain 2:                13.633 seconds (Total)
## Chain 2: 
## 
## SAMPLING FOR MODEL 'prophet' NOW (CHAIN 3).
## Chain 3: 
## Chain 3: Gradient evaluation took 0 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3: 
## Chain 3: 
## Chain 3: WARNING: There aren't enough warmup iterations to fit the
## Chain 3:          three stages of adaptation as currently configured.
## Chain 3:          Reducing each adaptation stage to 15%/75%/10% of
## Chain 3:          the given number of warmup iterations:
## Chain 3:            init_buffer = 7
## Chain 3:            adapt_window = 38
## Chain 3:            term_buffer = 5
## Chain 3: 
## Chain 3: Iteration:  1 / 100 [  1%]  (Warmup)
## Chain 3: Iteration: 10 / 100 [ 10%]  (Warmup)
## Chain 3: Iteration: 20 / 100 [ 20%]  (Warmup)
## Chain 3: Iteration: 30 / 100 [ 30%]  (Warmup)
## Chain 3: Iteration: 40 / 100 [ 40%]  (Warmup)
## Chain 3: Iteration: 50 / 100 [ 50%]  (Warmup)
## Chain 3: Iteration: 51 / 100 [ 51%]  (Sampling)
## Chain 3: Iteration: 60 / 100 [ 60%]  (Sampling)
## Chain 3: Iteration: 70 / 100 [ 70%]  (Sampling)
## Chain 3: Iteration: 80 / 100 [ 80%]  (Sampling)
## Chain 3: Iteration: 90 / 100 [ 90%]  (Sampling)
## Chain 3: Iteration: 100 / 100 [100%]  (Sampling)
## Chain 3: 
## Chain 3:  Elapsed Time: 6.286 seconds (Warm-up)
## Chain 3:                6.644 seconds (Sampling)
## Chain 3:                12.93 seconds (Total)
## Chain 3: 
## 
## SAMPLING FOR MODEL 'prophet' NOW (CHAIN 4).
## Chain 4: 
## Chain 4: Gradient evaluation took 0 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4: 
## Chain 4: 
## Chain 4: WARNING: There aren't enough warmup iterations to fit the
## Chain 4:          three stages of adaptation as currently configured.
## Chain 4:          Reducing each adaptation stage to 15%/75%/10% of
## Chain 4:          the given number of warmup iterations:
## Chain 4:            init_buffer = 7
## Chain 4:            adapt_window = 38
## Chain 4:            term_buffer = 5
## Chain 4: 
## Chain 4: Iteration:  1 / 100 [  1%]  (Warmup)
## Chain 4: Iteration: 10 / 100 [ 10%]  (Warmup)
## Chain 4: Iteration: 20 / 100 [ 20%]  (Warmup)
## Chain 4: Iteration: 30 / 100 [ 30%]  (Warmup)
## Chain 4: Iteration: 40 / 100 [ 40%]  (Warmup)
## Chain 4: Iteration: 50 / 100 [ 50%]  (Warmup)
## Chain 4: Iteration: 51 / 100 [ 51%]  (Sampling)
## Chain 4: Iteration: 60 / 100 [ 60%]  (Sampling)
## Chain 4: Iteration: 70 / 100 [ 70%]  (Sampling)
## Chain 4: Iteration: 80 / 100 [ 80%]  (Sampling)
## Chain 4: Iteration: 90 / 100 [ 90%]  (Sampling)
## Chain 4: Iteration: 100 / 100 [100%]  (Sampling)
## Chain 4: 
## Chain 4:  Elapsed Time: 6.918 seconds (Warm-up)
## Chain 4:                10.483 seconds (Sampling)
## Chain 4:                17.401 seconds (Total)
## Chain 4:
## Warning: There were 11 transitions after warmup that exceeded the maximum treedepth. Increase max_treedepth above 10. See
## http://mc-stan.org/misc/warnings.html#maximum-treedepth-exceeded
## Warning: Examine the pairs() plot to diagnose sampling problems
## Warning: The largest R-hat is 2.51, indicating chains have not mixed.
## Running the chains for more iterations may help. See
## http://mc-stan.org/misc/warnings.html#r-hat
## Warning: Bulk Effective Samples Size (ESS) is too low, indicating posterior means and medians may be unreliable.
## Running the chains for more iterations may help. See
## http://mc-stan.org/misc/warnings.html#bulk-ess
## Warning: Tail Effective Samples Size (ESS) is too low, indicating posterior variances and tail quantiles may be unreliable.
## Running the chains for more iterations may help. See
## http://mc-stan.org/misc/warnings.html#tail-ess
plot(alcohol_model, forecast)

prophet_plot_components(alcohol_model, forecast)